{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# CNKI_Selenium"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from lxml.html import fromstring\n",
    "import time\n",
    "from random import random\n",
    "import requests\n",
    "import base64"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-2-66d562601d91>:11: DeprecationWarning: use options instead of chrome_options\n",
      "  driver = webdriver.Chrome(chrome_options = opts)#desired_capabilities=caps\n"
     ]
    }
   ],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.desired_capabilities import DesiredCapabilities\n",
    "\n",
    "opts = webdriver.ChromeOptions()\n",
    "opts.add_argument('-no-sandbox')#解决DevToolsActivePort文件不存在的报错\n",
    "opts.add_argument('window-size=1920x3000')#指定浏览器分辨率\n",
    "opts.add_argument('--disable-gpu')#谷歌文档提到需要加上→这个属性来规避bug\n",
    "opts.add_argument('--hide-scrollbars')#隐藏滚动条，应对特殊页面\n",
    "\n",
    "\n",
    "driver = webdriver.Chrome(chrome_options = opts)#desired_capabilities=caps"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 如果使用校内网，直接登录[中国知网](https://www.cnki.net/)\n",
    "* 如果使用校外网，请登录[外部访问系统](http://fsso.cnki.net/)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 检查是否登录"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.get(\"https://www.cnki.net/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# driver.find_element_by_xpath('//*[@id=\"headerBox\"]/div[1]/div/div/div[4]').click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 校内网_直接登录"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# #校园网ip登录\n",
    "# element = driver.find_element_by_id('Button2')\n",
    "# element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'中山大学南方学院'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "element = driver.find_element_by_id('Ecp_loginShowName1')\n",
    "element.get_attribute('innerHTML')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 检查窗口位置"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* 出现多个窗口，检查窗口位置\n",
    "* 每一个窗口在driver中自动生成唯一的窗口id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 点击高级检索\n",
    "element = driver.find_elements_by_xpath('//div[@class=\"readvce\"]/a')[0]\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-32BA9F5F2308C4D6CDAD4F08EB1352E1',\n",
       " 'CDwindow-2D1C5D5C89222381E9EF876BD15997BB']"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#查看窗口信息（现在打开了两个窗口）\n",
    "driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'CDwindow-32BA9F5F2308C4D6CDAD4F08EB1352E1'"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "driver.current_window_handle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-10-6c6d5ce6602d>:1: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "driver.switch_to_window(driver.window_handles[1])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 直接搜索栏添加索引词"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "search_index =  {\"theme\": \"智慧物联网\", \"author\": \"\",\"literature\":\"\"}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "## 清空主题input\n",
    "driver.find_element_by_xpath('//*[@id=\"gradetxt\"]/dd[1]/div[2]/input').clear()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_xpath('//*[@id=\"gradetxt\"]/dd[1]/div[2]/input').send_keys(search_index['theme'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "## 清空作者input\n",
    "driver.find_element_by_xpath('//*[@id=\"gradetxt\"]/dd[2]/div[2]/input').clear()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_xpath('//*[@id=\"gradetxt\"]/dd[2]/div[2]/input').send_keys(search_index['author'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "## 清空文献来源input\n",
    "driver.find_element_by_xpath('//*[@id=\"gradetxt\"]/dd[3]/div[2]/input').clear()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_xpath('//*[@id=\"gradetxt\"]/dd[3]/div[2]/input').send_keys(search_index['literature'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('//input[@value=\"检索\"]')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 点击期刊检索以及选择期刊"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 点击期刊\n",
    "driver.find_element_by_xpath('//ul[@class=\"doctype-menus keji\"]/li/a').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "#点击CSSI\n",
    "driver.find_element_by_xpath('//input[@key=\"CSI\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "#点击北大核心\n",
    "driver.find_element_by_xpath('//input[@key=\"HX\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_xpath('/html/body/div[2]/div/div[2]/div/div[1]/div[1]/div[2]/div[2]/input').click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 填写query\n",
    "* 可以在高级检索直接检索（只要不精确查找）\n",
    "* 建议专业检索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 点击专业检索\n",
    "driver.find_element_by_name('majorSearch').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "AI_新媒体_query = '(TI=\"物联网\" and SU=\"人工智能\") OR (TI=\"AI\" and SU = \"科学\")'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "element = driver.find_element_by_xpath('//textarea')\n",
    "element.clear()\n",
    "element.send_keys(AI_新媒体_query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "# driver.find_element_by_xpath('/html/body/div[4]/div/div[2]/div/div[1]/div[2]/dl/dd[1]/p').get_attribute('innerHTML')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_xpath('//div[@class=\"search-buttons\"]/input[@class=\"btn-search\"]').click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 点击检索(点击页面显示50篇+全选)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 显示\n",
    "element = driver.find_element_by_xpath('//*[@id=\"perPageDiv\"]/div')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 50 \n",
    "element = driver.find_element_by_xpath('//*[@id=\"perPageDiv\"]/ul/li[3]')\n",
    "element.click()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 获取页面内容"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>篇名</th>\n",
       "      <th>作者</th>\n",
       "      <th>刊名</th>\n",
       "      <th>发表时间</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "      <th>操作</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>901</td>\n",
       "      <td>Watson IoT chief: AI can broaden IoT services</td>\n",
       "      <td>Jon Gold</td>\n",
       "      <td>Network World (Online)</td>\n",
       "      <td>2019-10-29</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>902</td>\n",
       "      <td>National Science Foundation; New NSF funding o...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NewsRx Health &amp; Science</td>\n",
       "      <td>2019-10-27</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>903</td>\n",
       "      <td>National Science Foundation; New NSF funding o...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NewsRx Health &amp; Science</td>\n",
       "      <td>2019-10-27</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>904</td>\n",
       "      <td>量子人工智能科学技术研究中心简介</td>\n",
       "      <td>NaN</td>\n",
       "      <td>自然杂志</td>\n",
       "      <td>2019-10-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>52.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>905</td>\n",
       "      <td>论人工智能技术应用研究现状和发展前景</td>\n",
       "      <td>王嫄; 解文霞; 孔德莉; 高喜平</td>\n",
       "      <td>科技与创新</td>\n",
       "      <td>2019-10-25</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2114.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>906</td>\n",
       "      <td>基于物联网的医疗系统在慢性阻塞性肺疾病管理中的应用现状</td>\n",
       "      <td>姜宗良;王语嫣;王海播;马利军;孙娜雅</td>\n",
       "      <td>护理研究</td>\n",
       "      <td>2019-10-25</td>\n",
       "      <td>3.0</td>\n",
       "      <td>355.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>907</td>\n",
       "      <td>人工智能信息资源建设路径的多维度探析</td>\n",
       "      <td>毕晓妍</td>\n",
       "      <td>四川图书馆学报</td>\n",
       "      <td>2019-10-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>83.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>908</td>\n",
       "      <td>以人工智能背景为导向的遥感科学与技术专业模式识别课程的建设与实践</td>\n",
       "      <td>陶超; 马慧云; 邹峥嵘</td>\n",
       "      <td>测绘与空间地理信息</td>\n",
       "      <td>2019-10-25</td>\n",
       "      <td>2.0</td>\n",
       "      <td>238.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>909</td>\n",
       "      <td>人工智能时代应如何改变思维：系统、关联和直觉——钱旭红院士访谈</td>\n",
       "      <td>黄时进; 成素梅</td>\n",
       "      <td>哲学分析</td>\n",
       "      <td>2019-10-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>273.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>910</td>\n",
       "      <td>科学选择与伦理选择的冲突：麦克尤恩《像我这样的机器》中的人工智能与脑文本</td>\n",
       "      <td>尚必武</td>\n",
       "      <td>外国文学研究</td>\n",
       "      <td>2019-10-25</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1137.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>911</td>\n",
       "      <td>基于人工智能的物联网业务分类保障方法研究</td>\n",
       "      <td>吴玖蔚</td>\n",
       "      <td>通讯世界</td>\n",
       "      <td>2019-10-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>49.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>912</td>\n",
       "      <td>道术之衡 《道德经》对人工智能发展趋势的启示</td>\n",
       "      <td>韦欣; 向宁</td>\n",
       "      <td>中国宗教</td>\n",
       "      <td>2019-10-22</td>\n",
       "      <td>1.0</td>\n",
       "      <td>237.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>913</td>\n",
       "      <td>泛在电力物联网下的抽水蓄能电站智慧管理模式思考</td>\n",
       "      <td>何铮; 张林</td>\n",
       "      <td>水电与抽水蓄能</td>\n",
       "      <td>2019-10-20</td>\n",
       "      <td>2.0</td>\n",
       "      <td>209.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>914</td>\n",
       "      <td>人工智能和大数据让城市用水管理从艺术变成科学</td>\n",
       "      <td>于怡鑫</td>\n",
       "      <td>城市管理与科技</td>\n",
       "      <td>2019-10-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>131.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>915</td>\n",
       "      <td>马克思主义哲学视野下的人工智能探析</td>\n",
       "      <td>吴迪</td>\n",
       "      <td>新西部</td>\n",
       "      <td>2019-10-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>838.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>916</td>\n",
       "      <td>自由意志、道德代理与智能代理——兼论人工智能犯罪主体资格之生成</td>\n",
       "      <td>彭文华</td>\n",
       "      <td>法学</td>\n",
       "      <td>2019-10-20</td>\n",
       "      <td>15.0</td>\n",
       "      <td>1055.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>917</td>\n",
       "      <td>人机协同的新时代：我国人工智能教育应用的现状与趋势</td>\n",
       "      <td>陈丽; 郭玉娟; 高欣峰; 谢雷; 郑勤华</td>\n",
       "      <td>开放学习研究</td>\n",
       "      <td>2019-10-20</td>\n",
       "      <td>14.0</td>\n",
       "      <td>1387.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>918</td>\n",
       "      <td>如何认识人工智能的伦理冲突?——研究回顾与展望</td>\n",
       "      <td>谢洪明; 陈亮; 杨英楠</td>\n",
       "      <td>外国经济与管理</td>\n",
       "      <td>2019-10-20</td>\n",
       "      <td>16.0</td>\n",
       "      <td>2537.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>919</td>\n",
       "      <td>泛在电力物联网建设路线图、时间表出台</td>\n",
       "      <td>NaN</td>\n",
       "      <td>电力安全技术</td>\n",
       "      <td>2019-10-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>32.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>920</td>\n",
       "      <td>人工智能和物联网在电商领域的应用</td>\n",
       "      <td>郝京杰</td>\n",
       "      <td>中国新通信</td>\n",
       "      <td>2019-10-20</td>\n",
       "      <td>1.0</td>\n",
       "      <td>400.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>921</td>\n",
       "      <td>物联网形势下的5G技术研究</td>\n",
       "      <td>汪超; 郑成城; 穆健康</td>\n",
       "      <td>信息系统工程</td>\n",
       "      <td>2019-10-20</td>\n",
       "      <td>1.0</td>\n",
       "      <td>136.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>922</td>\n",
       "      <td>大数据背景下人工智能在计算机网络技术中的应用</td>\n",
       "      <td>刘海梅</td>\n",
       "      <td>传播力研究</td>\n",
       "      <td>2019-10-20</td>\n",
       "      <td>3.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>923</td>\n",
       "      <td>人工智能在财务会计中的应用研究</td>\n",
       "      <td>侯丹; 林明珠</td>\n",
       "      <td>营销界</td>\n",
       "      <td>2019-10-18</td>\n",
       "      <td>2.0</td>\n",
       "      <td>275.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>924</td>\n",
       "      <td>Scientific Foundation of Real-Time Input-Outpu...</td>\n",
       "      <td>Ning Kang</td>\n",
       "      <td>American Journal of Industrial and Business Ma...</td>\n",
       "      <td>2019-10-17</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>925</td>\n",
       "      <td>泛在电力物联网数据挖掘体系建设综述及数据驱动认知框架探究</td>\n",
       "      <td>贺兴; 艾芊; 邱才明; 张东霞</td>\n",
       "      <td>电器与能效管理技术</td>\n",
       "      <td>2019-10-15</td>\n",
       "      <td>5.0</td>\n",
       "      <td>385.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>926</td>\n",
       "      <td>基于人工智能技术背景下计算机网络应用研究</td>\n",
       "      <td>宋钰</td>\n",
       "      <td>海峡科技与产业</td>\n",
       "      <td>2019-10-15</td>\n",
       "      <td>1.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>927</td>\n",
       "      <td>介尺度中的复杂性——人工智能发展中的共性挑战</td>\n",
       "      <td>郭力; 邬俊; 李静海</td>\n",
       "      <td>Engineering</td>\n",
       "      <td>2019-10-15</td>\n",
       "      <td>2.0</td>\n",
       "      <td>149.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>928</td>\n",
       "      <td>人工智能与数字经济广东省实验室（深圳）</td>\n",
       "      <td>李岱素; 刘启强</td>\n",
       "      <td>广东科技</td>\n",
       "      <td>2019-10-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>114.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>929</td>\n",
       "      <td>人工智能视域下的社会学“费孝通悖论”求解</td>\n",
       "      <td>石英</td>\n",
       "      <td>人文杂志</td>\n",
       "      <td>2019-10-15</td>\n",
       "      <td>2.0</td>\n",
       "      <td>381.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>930</td>\n",
       "      <td>物联网技术应用于智能机器人系统的探索与实践</td>\n",
       "      <td>钟元权</td>\n",
       "      <td>山东农业工程学院学报</td>\n",
       "      <td>2019-10-15</td>\n",
       "      <td>4.0</td>\n",
       "      <td>142.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>931</td>\n",
       "      <td>人工智能更接近边缘：针对物联网的数据洪流强化生态系统</td>\n",
       "      <td>NaN</td>\n",
       "      <td>世界电子元器件</td>\n",
       "      <td>2019-10-15</td>\n",
       "      <td>NaN</td>\n",
       "      <td>75.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>932</td>\n",
       "      <td>人工智能和数据挖掘在人机工程PHM中的应用</td>\n",
       "      <td>王伟</td>\n",
       "      <td>华北科技学院学报</td>\n",
       "      <td>2019-10-15</td>\n",
       "      <td>2.0</td>\n",
       "      <td>198.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>933</td>\n",
       "      <td>ビル・施設分野におけるIoTやAIの活用</td>\n",
       "      <td>NaN</td>\n",
       "      <td>The Journal of the Institute of Electrical Ins...</td>\n",
       "      <td>2019-10-14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>934</td>\n",
       "      <td>電力産業用IoT及びAIプラットフォーム</td>\n",
       "      <td>NaN</td>\n",
       "      <td>The Journal of the Institute of Electrical Ins...</td>\n",
       "      <td>2019-10-14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>935</td>\n",
       "      <td>日本の製造業のIoT化の技術動向</td>\n",
       "      <td>NaN</td>\n",
       "      <td>The Journal of the Institute of Electrical Ins...</td>\n",
       "      <td>2019-10-14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>936</td>\n",
       "      <td>中国支部だより ひろしまサンドボックス AI/IoTプラットフォーム実証事業</td>\n",
       "      <td>NaN</td>\n",
       "      <td>The Journal of the Institute of Electrical Ins...</td>\n",
       "      <td>2019-10-14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>937</td>\n",
       "      <td>関西支部だより AI・IoTの概要と活用事例について</td>\n",
       "      <td>NaN</td>\n",
       "      <td>The Journal of the Institute of Electrical Ins...</td>\n",
       "      <td>2019-10-14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>938</td>\n",
       "      <td>優秀開発賞 IoT・AIとBIMを活用したスマートビルマネジメントシステム</td>\n",
       "      <td>NaN</td>\n",
       "      <td>The Journal of the Institute of Electrical Ins...</td>\n",
       "      <td>2019-10-14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>939</td>\n",
       "      <td>Enercamp; Enercamp With The Title of Amazon St...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Journal of Engineering</td>\n",
       "      <td>2019-10-14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>940</td>\n",
       "      <td>Advanced bioscience and AI: debugging the futu...</td>\n",
       "      <td>David R. Lawrence</td>\n",
       "      <td>Emerging Topics in Life Sciences</td>\n",
       "      <td>2019-10-11</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>941</td>\n",
       "      <td>人工智能经济对知识产权法律保护的挑战</td>\n",
       "      <td>王国安</td>\n",
       "      <td>现代营销(经营版)</td>\n",
       "      <td>2019-10-10</td>\n",
       "      <td>NaN</td>\n",
       "      <td>537.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>942</td>\n",
       "      <td>神经科学引入AI得到的启示</td>\n",
       "      <td>乔琦</td>\n",
       "      <td>世界科学</td>\n",
       "      <td>2019-10-10</td>\n",
       "      <td>NaN</td>\n",
       "      <td>74.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>943</td>\n",
       "      <td>地方本科院校人工智能专业建设研究与实践</td>\n",
       "      <td>刘光洁; 张嵛; 姚亦飞</td>\n",
       "      <td>计算机教育</td>\n",
       "      <td>2019-10-10</td>\n",
       "      <td>4.0</td>\n",
       "      <td>229.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>944</td>\n",
       "      <td>北京大学人工智能课程教学改革与实践</td>\n",
       "      <td>罗定生;李文新;邓志鸿;童云海;刘家瑛</td>\n",
       "      <td>计算机教育</td>\n",
       "      <td>2019-10-10</td>\n",
       "      <td>7.0</td>\n",
       "      <td>690.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>945</td>\n",
       "      <td>Ai Driven Advanced Internet Of Things (Iotx2):...</td>\n",
       "      <td>Ergen Onur;Belcastro Kristen D</td>\n",
       "      <td>Anatolian journal of cardiology</td>\n",
       "      <td>2019-10-10</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>946</td>\n",
       "      <td>新时代教育理论创新的动因与路向探究</td>\n",
       "      <td>赵晋; 蔡冉冉; 张建军</td>\n",
       "      <td>中国电化教育</td>\n",
       "      <td>2019-10-09 13:14</td>\n",
       "      <td>2.0</td>\n",
       "      <td>790.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>947</td>\n",
       "      <td>Raging robots, hapless humans: the AI dystopia.</td>\n",
       "      <td>Leslie David</td>\n",
       "      <td>Nature</td>\n",
       "      <td>2019-10-09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>948</td>\n",
       "      <td>安徽合肥综合性国家科学中心启动人工智能研究院建设</td>\n",
       "      <td>NaN</td>\n",
       "      <td>华东科技</td>\n",
       "      <td>2019-10-05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>69.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>949</td>\n",
       "      <td>“智适应”理论与实践——第三届人工智能和自适应教育国际大会综述</td>\n",
       "      <td>刘凯; 王韶; 隆舟; 王涛</td>\n",
       "      <td>开放教育研究</td>\n",
       "      <td>2019-10-05</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1176.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>950</td>\n",
       "      <td>基于泛在电力物联网的辅助决策系统的分析与设计</td>\n",
       "      <td>房萌;焦之明;鲁南;庞怀江;纪洪伟</td>\n",
       "      <td>科技经济导刊</td>\n",
       "      <td>2019-10-05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>145.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Unnamed: 0                                                 篇名  \\\n",
       "0          901      Watson IoT chief: AI can broaden IoT services   \n",
       "1          902  National Science Foundation; New NSF funding o...   \n",
       "2          903  National Science Foundation; New NSF funding o...   \n",
       "3          904                                   量子人工智能科学技术研究中心简介   \n",
       "4          905                                 论人工智能技术应用研究现状和发展前景   \n",
       "5          906                        基于物联网的医疗系统在慢性阻塞性肺疾病管理中的应用现状   \n",
       "6          907                                 人工智能信息资源建设路径的多维度探析   \n",
       "7          908                   以人工智能背景为导向的遥感科学与技术专业模式识别课程的建设与实践   \n",
       "8          909                    人工智能时代应如何改变思维：系统、关联和直觉——钱旭红院士访谈   \n",
       "9          910               科学选择与伦理选择的冲突：麦克尤恩《像我这样的机器》中的人工智能与脑文本   \n",
       "10         911                               基于人工智能的物联网业务分类保障方法研究   \n",
       "11         912                             道术之衡 《道德经》对人工智能发展趋势的启示   \n",
       "12         913                            泛在电力物联网下的抽水蓄能电站智慧管理模式思考   \n",
       "13         914                             人工智能和大数据让城市用水管理从艺术变成科学   \n",
       "14         915                                  马克思主义哲学视野下的人工智能探析   \n",
       "15         916                    自由意志、道德代理与智能代理——兼论人工智能犯罪主体资格之生成   \n",
       "16         917                          人机协同的新时代：我国人工智能教育应用的现状与趋势   \n",
       "17         918                            如何认识人工智能的伦理冲突?——研究回顾与展望   \n",
       "18         919                                 泛在电力物联网建设路线图、时间表出台   \n",
       "19         920                                   人工智能和物联网在电商领域的应用   \n",
       "20         921                                      物联网形势下的5G技术研究   \n",
       "21         922                             大数据背景下人工智能在计算机网络技术中的应用   \n",
       "22         923                                    人工智能在财务会计中的应用研究   \n",
       "23         924  Scientific Foundation of Real-Time Input-Outpu...   \n",
       "24         925                       泛在电力物联网数据挖掘体系建设综述及数据驱动认知框架探究   \n",
       "25         926                               基于人工智能技术背景下计算机网络应用研究   \n",
       "26         927                             介尺度中的复杂性——人工智能发展中的共性挑战   \n",
       "27         928                                人工智能与数字经济广东省实验室（深圳）   \n",
       "28         929                               人工智能视域下的社会学“费孝通悖论”求解   \n",
       "29         930                              物联网技术应用于智能机器人系统的探索与实践   \n",
       "30         931                         人工智能更接近边缘：针对物联网的数据洪流强化生态系统   \n",
       "31         932                              人工智能和数据挖掘在人机工程PHM中的应用   \n",
       "32         933                               ビル・施設分野におけるIoTやAIの活用   \n",
       "33         934                               電力産業用IoT及びAIプラットフォーム   \n",
       "34         935                                   日本の製造業のIoT化の技術動向   \n",
       "35         936             中国支部だより ひろしまサンドボックス AI/IoTプラットフォーム実証事業   \n",
       "36         937                         関西支部だより AI・IoTの概要と活用事例について   \n",
       "37         938              優秀開発賞 IoT・AIとBIMを活用したスマートビルマネジメントシステム   \n",
       "38         939  Enercamp; Enercamp With The Title of Amazon St...   \n",
       "39         940  Advanced bioscience and AI: debugging the futu...   \n",
       "40         941                                 人工智能经济对知识产权法律保护的挑战   \n",
       "41         942                                      神经科学引入AI得到的启示   \n",
       "42         943                                地方本科院校人工智能专业建设研究与实践   \n",
       "43         944                                  北京大学人工智能课程教学改革与实践   \n",
       "44         945  Ai Driven Advanced Internet Of Things (Iotx2):...   \n",
       "45         946                                  新时代教育理论创新的动因与路向探究   \n",
       "46         947    Raging robots, hapless humans: the AI dystopia.   \n",
       "47         948                           安徽合肥综合性国家科学中心启动人工智能研究院建设   \n",
       "48         949                    “智适应”理论与实践——第三届人工智能和自适应教育国际大会综述   \n",
       "49         950                             基于泛在电力物联网的辅助决策系统的分析与设计   \n",
       "\n",
       "                                作者  \\\n",
       "0                         Jon Gold   \n",
       "1                              NaN   \n",
       "2                              NaN   \n",
       "3                              NaN   \n",
       "4                王嫄; 解文霞; 孔德莉; 高喜平   \n",
       "5              姜宗良;王语嫣;王海播;马利军;孙娜雅   \n",
       "6                              毕晓妍   \n",
       "7                     陶超; 马慧云; 邹峥嵘   \n",
       "8                         黄时进; 成素梅   \n",
       "9                              尚必武   \n",
       "10                             吴玖蔚   \n",
       "11                          韦欣; 向宁   \n",
       "12                          何铮; 张林   \n",
       "13                             于怡鑫   \n",
       "14                              吴迪   \n",
       "15                             彭文华   \n",
       "16           陈丽; 郭玉娟; 高欣峰; 谢雷; 郑勤华   \n",
       "17                    谢洪明; 陈亮; 杨英楠   \n",
       "18                             NaN   \n",
       "19                             郝京杰   \n",
       "20                    汪超; 郑成城; 穆健康   \n",
       "21                             刘海梅   \n",
       "22                         侯丹; 林明珠   \n",
       "23                       Ning Kang   \n",
       "24                贺兴; 艾芊; 邱才明; 张东霞   \n",
       "25                              宋钰   \n",
       "26                     郭力; 邬俊; 李静海   \n",
       "27                        李岱素; 刘启强   \n",
       "28                              石英   \n",
       "29                             钟元权   \n",
       "30                             NaN   \n",
       "31                              王伟   \n",
       "32                             NaN   \n",
       "33                             NaN   \n",
       "34                             NaN   \n",
       "35                             NaN   \n",
       "36                             NaN   \n",
       "37                             NaN   \n",
       "38                             NaN   \n",
       "39               David R. Lawrence   \n",
       "40                             王国安   \n",
       "41                              乔琦   \n",
       "42                    刘光洁; 张嵛; 姚亦飞   \n",
       "43             罗定生;李文新;邓志鸿;童云海;刘家瑛   \n",
       "44  Ergen Onur;Belcastro Kristen D   \n",
       "45                    赵晋; 蔡冉冉; 张建军   \n",
       "46                    Leslie David   \n",
       "47                             NaN   \n",
       "48                  刘凯; 王韶; 隆舟; 王涛   \n",
       "49               房萌;焦之明;鲁南;庞怀江;纪洪伟   \n",
       "\n",
       "                                                   刊名              发表时间    被引  \\\n",
       "0                              Network World (Online)        2019-10-29   NaN   \n",
       "1                             NewsRx Health & Science        2019-10-27   NaN   \n",
       "2                             NewsRx Health & Science        2019-10-27   NaN   \n",
       "3                                                自然杂志        2019-10-25   NaN   \n",
       "4                                               科技与创新        2019-10-25   8.0   \n",
       "5                                                护理研究        2019-10-25   3.0   \n",
       "6                                             四川图书馆学报        2019-10-25   NaN   \n",
       "7                                           测绘与空间地理信息        2019-10-25   2.0   \n",
       "8                                                哲学分析        2019-10-25   NaN   \n",
       "9                                              外国文学研究        2019-10-25   2.0   \n",
       "10                                               通讯世界        2019-10-25   NaN   \n",
       "11                                               中国宗教        2019-10-22   1.0   \n",
       "12                                            水电与抽水蓄能        2019-10-20   2.0   \n",
       "13                                            城市管理与科技        2019-10-20   NaN   \n",
       "14                                                新西部        2019-10-20   NaN   \n",
       "15                                                 法学        2019-10-20  15.0   \n",
       "16                                             开放学习研究        2019-10-20  14.0   \n",
       "17                                            外国经济与管理        2019-10-20  16.0   \n",
       "18                                             电力安全技术        2019-10-20   NaN   \n",
       "19                                              中国新通信        2019-10-20   1.0   \n",
       "20                                             信息系统工程        2019-10-20   1.0   \n",
       "21                                              传播力研究        2019-10-20   3.0   \n",
       "22                                                营销界        2019-10-18   2.0   \n",
       "23  American Journal of Industrial and Business Ma...        2019-10-17   NaN   \n",
       "24                                          电器与能效管理技术        2019-10-15   5.0   \n",
       "25                                            海峡科技与产业        2019-10-15   1.0   \n",
       "26                                        Engineering        2019-10-15   2.0   \n",
       "27                                               广东科技        2019-10-15   NaN   \n",
       "28                                               人文杂志        2019-10-15   2.0   \n",
       "29                                         山东农业工程学院学报        2019-10-15   4.0   \n",
       "30                                            世界电子元器件        2019-10-15   NaN   \n",
       "31                                           华北科技学院学报        2019-10-15   2.0   \n",
       "32  The Journal of the Institute of Electrical Ins...        2019-10-14   NaN   \n",
       "33  The Journal of the Institute of Electrical Ins...        2019-10-14   NaN   \n",
       "34  The Journal of the Institute of Electrical Ins...        2019-10-14   NaN   \n",
       "35  The Journal of the Institute of Electrical Ins...        2019-10-14   NaN   \n",
       "36  The Journal of the Institute of Electrical Ins...        2019-10-14   NaN   \n",
       "37  The Journal of the Institute of Electrical Ins...        2019-10-14   NaN   \n",
       "38                             Journal of Engineering        2019-10-14   NaN   \n",
       "39                   Emerging Topics in Life Sciences        2019-10-11   NaN   \n",
       "40                                          现代营销(经营版)        2019-10-10   NaN   \n",
       "41                                               世界科学        2019-10-10   NaN   \n",
       "42                                              计算机教育        2019-10-10   4.0   \n",
       "43                                              计算机教育        2019-10-10   7.0   \n",
       "44                    Anatolian journal of cardiology        2019-10-10   NaN   \n",
       "45                                             中国电化教育  2019-10-09 13:14   2.0   \n",
       "46                                             Nature        2019-10-09   NaN   \n",
       "47                                               华东科技        2019-10-05   NaN   \n",
       "48                                             开放教育研究        2019-10-05   5.0   \n",
       "49                                             科技经济导刊        2019-10-05   NaN   \n",
       "\n",
       "        下载   操作  \n",
       "0      NaN  NaN  \n",
       "1      NaN  NaN  \n",
       "2      NaN  NaN  \n",
       "3     52.0   下载  \n",
       "4   2114.0   下载  \n",
       "5    355.0   下载  \n",
       "6     83.0   下载  \n",
       "7    238.0   下载  \n",
       "8    273.0   下载  \n",
       "9   1137.0   下载  \n",
       "10    49.0   下载  \n",
       "11   237.0   下载  \n",
       "12   209.0   下载  \n",
       "13   131.0   下载  \n",
       "14   838.0   下载  \n",
       "15  1055.0   下载  \n",
       "16  1387.0   下载  \n",
       "17  2537.0   下载  \n",
       "18    32.0   下载  \n",
       "19   400.0   下载  \n",
       "20   136.0   下载  \n",
       "21    70.0   下载  \n",
       "22   275.0   下载  \n",
       "23     NaN  NaN  \n",
       "24   385.0   下载  \n",
       "25    12.0   下载  \n",
       "26   149.0   下载  \n",
       "27   114.0   下载  \n",
       "28   381.0   下载  \n",
       "29   142.0   下载  \n",
       "30    75.0   下载  \n",
       "31   198.0   下载  \n",
       "32     NaN  NaN  \n",
       "33     NaN  NaN  \n",
       "34     NaN  NaN  \n",
       "35     NaN  NaN  \n",
       "36     NaN  NaN  \n",
       "37     NaN  NaN  \n",
       "38     NaN  NaN  \n",
       "39     NaN  NaN  \n",
       "40   537.0   下载  \n",
       "41    74.0   下载  \n",
       "42   229.0   下载  \n",
       "43   690.0   下载  \n",
       "44     NaN  NaN  \n",
       "45   790.0   下载  \n",
       "46     NaN  NaN  \n",
       "47    69.0   下载  \n",
       "48  1176.0   下载  \n",
       "49   145.0   下载  "
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "element = driver.find_element_by_id('gridTable')\n",
    "page_html = element.get_attribute('innerHTML')\n",
    "data = pd.read_html(page_html)[0]\n",
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 翻页"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'下一页'"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 翻页\n",
    "element = driver.find_element_by_id('PageNext')\n",
    "element.get_attribute('innerHTML')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'1/51'"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 跳转上限\n",
    "element = driver.find_element_by_xpath('//span[@class=\"countPageMark\"]')\n",
    "page_str = element.get_attribute('innerHTML')\n",
    "page_str "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['1', '51']"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "page_int = page_str.split('/')\n",
    "page_int"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51]\n"
     ]
    }
   ],
   "source": [
    "pages = list(range(1,int(page_int[1])+1))\n",
    "print(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 取前18页\n",
    "pages = list(range(1,19))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "表格_html = dict()\n",
    "main_content =\"\"\n",
    "element = None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "def process_pages (pages):\n",
    "    for p in pages:\n",
    "        print (p,end='\\t')\n",
    "        跳转 = driver.find_element_by_id('PageNext')\n",
    "        跳转.click()\n",
    "        time.sleep(8+1*random())\n",
    "        # 获取含有页面主要数据的表格\n",
    "        element = driver.find_element_by_id('gridTable')\n",
    "        main_content = element.get_attribute('innerHTML')\n",
    "        表格_html[p] = main_content"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\t2\t3\t4\t5\t6\t7\t8\t9\t10\t11\t12\t13\t14\t15\t16\t17\t18\t"
     ]
    }
   ],
   "source": [
    "process_pages(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>html_snippets</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>\\n&lt;div class=\"toolbar\"&gt;&lt;div id=\"countPageDiv\" ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                        html_snippets\n",
       "1   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "2   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "3   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "4   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "5   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "6   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "7   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "8   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "9   \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "10  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "11  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "12  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "13  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "14  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "15  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "16  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "17  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ...\n",
       "18  \\n<div class=\"toolbar\"><div id=\"countPageDiv\" ..."
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame([表格_html]).T\n",
    "df.columns = [\"html_snippets\"]\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "网站 = \"中国知网\"\n",
    "fn = { \"output\" : { \"htm_snippets\": \"data_raw_src/知网_htm_snippets_{网站}.tsv\"}\n",
    "     }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 保存页面内容的csv文件\n",
    "filename = fn [\"output\"] [\"htm_snippets\"] \n",
    "df.to_csv(filename.format(网站=网站), sep=\"\\t\", encoding=\"utf8\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "l_df = []\n",
    "for p in pages:\n",
    "    表格 = pd.read_html(表格_html[p])[0]\n",
    "    l_df.append(表格)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>篇名</th>\n",
       "      <th>作者</th>\n",
       "      <th>刊名</th>\n",
       "      <th>发表时间</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "      <th>操作</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>901</td>\n",
       "      <td>Watson IoT chief: AI can broaden IoT services</td>\n",
       "      <td>Jon Gold</td>\n",
       "      <td>Network World (Online)</td>\n",
       "      <td>2019-10-29</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>902</td>\n",
       "      <td>National Science Foundation; New NSF funding o...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NewsRx Health &amp; Science</td>\n",
       "      <td>2019-10-27</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>903</td>\n",
       "      <td>National Science Foundation; New NSF funding o...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NewsRx Health &amp; Science</td>\n",
       "      <td>2019-10-27</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>904</td>\n",
       "      <td>量子人工智能科学技术研究中心简介</td>\n",
       "      <td>NaN</td>\n",
       "      <td>自然杂志</td>\n",
       "      <td>2019-10-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>52.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>905</td>\n",
       "      <td>论人工智能技术应用研究现状和发展前景</td>\n",
       "      <td>王嫄; 解文霞; 孔德莉; 高喜平</td>\n",
       "      <td>科技与创新</td>\n",
       "      <td>2019-10-25</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2114.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>895</th>\n",
       "      <td>946</td>\n",
       "      <td>新时代教育理论创新的动因与路向探究</td>\n",
       "      <td>赵晋; 蔡冉冉; 张建军</td>\n",
       "      <td>中国电化教育</td>\n",
       "      <td>2019-10-09 13:14</td>\n",
       "      <td>2.0</td>\n",
       "      <td>790.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>896</th>\n",
       "      <td>947</td>\n",
       "      <td>Raging robots, hapless humans: the AI dystopia.</td>\n",
       "      <td>Leslie David</td>\n",
       "      <td>Nature</td>\n",
       "      <td>2019-10-09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>897</th>\n",
       "      <td>948</td>\n",
       "      <td>安徽合肥综合性国家科学中心启动人工智能研究院建设</td>\n",
       "      <td>NaN</td>\n",
       "      <td>华东科技</td>\n",
       "      <td>2019-10-05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>69.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>898</th>\n",
       "      <td>949</td>\n",
       "      <td>“智适应”理论与实践——第三届人工智能和自适应教育国际大会综述</td>\n",
       "      <td>刘凯; 王韶; 隆舟; 王涛</td>\n",
       "      <td>开放教育研究</td>\n",
       "      <td>2019-10-05</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1176.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>899</th>\n",
       "      <td>950</td>\n",
       "      <td>基于泛在电力物联网的辅助决策系统的分析与设计</td>\n",
       "      <td>房萌;焦之明;鲁南;庞怀江;纪洪伟</td>\n",
       "      <td>科技经济导刊</td>\n",
       "      <td>2019-10-05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>145.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>950 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Unnamed: 0                                                 篇名  \\\n",
       "0           901      Watson IoT chief: AI can broaden IoT services   \n",
       "1           902  National Science Foundation; New NSF funding o...   \n",
       "2           903  National Science Foundation; New NSF funding o...   \n",
       "3           904                                   量子人工智能科学技术研究中心简介   \n",
       "4           905                                 论人工智能技术应用研究现状和发展前景   \n",
       "..          ...                                                ...   \n",
       "895         946                                  新时代教育理论创新的动因与路向探究   \n",
       "896         947    Raging robots, hapless humans: the AI dystopia.   \n",
       "897         948                           安徽合肥综合性国家科学中心启动人工智能研究院建设   \n",
       "898         949                    “智适应”理论与实践——第三届人工智能和自适应教育国际大会综述   \n",
       "899         950                             基于泛在电力物联网的辅助决策系统的分析与设计   \n",
       "\n",
       "                    作者                       刊名              发表时间   被引  \\\n",
       "0             Jon Gold   Network World (Online)        2019-10-29  NaN   \n",
       "1                  NaN  NewsRx Health & Science        2019-10-27  NaN   \n",
       "2                  NaN  NewsRx Health & Science        2019-10-27  NaN   \n",
       "3                  NaN                     自然杂志        2019-10-25  NaN   \n",
       "4    王嫄; 解文霞; 孔德莉; 高喜平                    科技与创新        2019-10-25  8.0   \n",
       "..                 ...                      ...               ...  ...   \n",
       "895       赵晋; 蔡冉冉; 张建军                   中国电化教育  2019-10-09 13:14  2.0   \n",
       "896       Leslie David                   Nature        2019-10-09  NaN   \n",
       "897                NaN                     华东科技        2019-10-05  NaN   \n",
       "898     刘凯; 王韶; 隆舟; 王涛                   开放教育研究        2019-10-05  5.0   \n",
       "899  房萌;焦之明;鲁南;庞怀江;纪洪伟                   科技经济导刊        2019-10-05  NaN   \n",
       "\n",
       "         下载   操作  \n",
       "0       NaN  NaN  \n",
       "1       NaN  NaN  \n",
       "2       NaN  NaN  \n",
       "3      52.0   下载  \n",
       "4    2114.0   下载  \n",
       "..      ...  ...  \n",
       "895   790.0   下载  \n",
       "896     NaN  NaN  \n",
       "897    69.0   下载  \n",
       "898  1176.0   下载  \n",
       "899   145.0   下载  \n",
       "\n",
       "[950 rows x 8 columns]"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_url_out = pd.concat(l_df).reset_index(drop=True)\n",
    "df_总表 = data.append(df_url_out)\n",
    "df_总表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>篇名</th>\n",
       "      <th>作者</th>\n",
       "      <th>刊名</th>\n",
       "      <th>发表时间</th>\n",
       "      <th>被引</th>\n",
       "      <th>下载</th>\n",
       "      <th>操作</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>901</td>\n",
       "      <td>Watson IoT chief: AI can broaden IoT services</td>\n",
       "      <td>Jon Gold</td>\n",
       "      <td>Network World (Online)</td>\n",
       "      <td>2019-10-29</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>902</td>\n",
       "      <td>National Science Foundation; New NSF funding o...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NewsRx Health &amp; Science</td>\n",
       "      <td>2019-10-27</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>903</td>\n",
       "      <td>National Science Foundation; New NSF funding o...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NewsRx Health &amp; Science</td>\n",
       "      <td>2019-10-27</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>904</td>\n",
       "      <td>量子人工智能科学技术研究中心简介</td>\n",
       "      <td>NaN</td>\n",
       "      <td>自然杂志</td>\n",
       "      <td>2019-10-25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>52.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>905</td>\n",
       "      <td>论人工智能技术应用研究现状和发展前景</td>\n",
       "      <td>王嫄; 解文霞; 孔德莉; 高喜平</td>\n",
       "      <td>科技与创新</td>\n",
       "      <td>2019-10-25</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2114.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>895</th>\n",
       "      <td>946</td>\n",
       "      <td>新时代教育理论创新的动因与路向探究</td>\n",
       "      <td>赵晋; 蔡冉冉; 张建军</td>\n",
       "      <td>中国电化教育</td>\n",
       "      <td>2019-10-09 13:14</td>\n",
       "      <td>2.0</td>\n",
       "      <td>790.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>896</th>\n",
       "      <td>947</td>\n",
       "      <td>Raging robots, hapless humans: the AI dystopia.</td>\n",
       "      <td>Leslie David</td>\n",
       "      <td>Nature</td>\n",
       "      <td>2019-10-09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>897</th>\n",
       "      <td>948</td>\n",
       "      <td>安徽合肥综合性国家科学中心启动人工智能研究院建设</td>\n",
       "      <td>NaN</td>\n",
       "      <td>华东科技</td>\n",
       "      <td>2019-10-05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>69.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>898</th>\n",
       "      <td>949</td>\n",
       "      <td>“智适应”理论与实践——第三届人工智能和自适应教育国际大会综述</td>\n",
       "      <td>刘凯; 王韶; 隆舟; 王涛</td>\n",
       "      <td>开放教育研究</td>\n",
       "      <td>2019-10-05</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1176.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>899</th>\n",
       "      <td>950</td>\n",
       "      <td>基于泛在电力物联网的辅助决策系统的分析与设计</td>\n",
       "      <td>房萌;焦之明;鲁南;庞怀江;纪洪伟</td>\n",
       "      <td>科技经济导刊</td>\n",
       "      <td>2019-10-05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>145.0</td>\n",
       "      <td>下载</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>950 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Unnamed: 0                                                 篇名  \\\n",
       "0           901      Watson IoT chief: AI can broaden IoT services   \n",
       "1           902  National Science Foundation; New NSF funding o...   \n",
       "2           903  National Science Foundation; New NSF funding o...   \n",
       "3           904                                   量子人工智能科学技术研究中心简介   \n",
       "4           905                                 论人工智能技术应用研究现状和发展前景   \n",
       "..          ...                                                ...   \n",
       "895         946                                  新时代教育理论创新的动因与路向探究   \n",
       "896         947    Raging robots, hapless humans: the AI dystopia.   \n",
       "897         948                           安徽合肥综合性国家科学中心启动人工智能研究院建设   \n",
       "898         949                    “智适应”理论与实践——第三届人工智能和自适应教育国际大会综述   \n",
       "899         950                             基于泛在电力物联网的辅助决策系统的分析与设计   \n",
       "\n",
       "                    作者                       刊名              发表时间   被引  \\\n",
       "0             Jon Gold   Network World (Online)        2019-10-29  NaN   \n",
       "1                  NaN  NewsRx Health & Science        2019-10-27  NaN   \n",
       "2                  NaN  NewsRx Health & Science        2019-10-27  NaN   \n",
       "3                  NaN                     自然杂志        2019-10-25  NaN   \n",
       "4    王嫄; 解文霞; 孔德莉; 高喜平                    科技与创新        2019-10-25  8.0   \n",
       "..                 ...                      ...               ...  ...   \n",
       "895       赵晋; 蔡冉冉; 张建军                   中国电化教育  2019-10-09 13:14  2.0   \n",
       "896       Leslie David                   Nature        2019-10-09  NaN   \n",
       "897                NaN                     华东科技        2019-10-05  NaN   \n",
       "898     刘凯; 王韶; 隆舟; 王涛                   开放教育研究        2019-10-05  5.0   \n",
       "899  房萌;焦之明;鲁南;庞怀江;纪洪伟                   科技经济导刊        2019-10-05  NaN   \n",
       "\n",
       "         下载   操作  \n",
       "0       NaN  NaN  \n",
       "1       NaN  NaN  \n",
       "2       NaN  NaN  \n",
       "3      52.0   下载  \n",
       "4    2114.0   下载  \n",
       "..      ...  ...  \n",
       "895   790.0   下载  \n",
       "896     NaN  NaN  \n",
       "897    69.0   下载  \n",
       "898  1176.0   下载  \n",
       "899   145.0   下载  \n",
       "\n",
       "[950 rows x 8 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 将内容表格存在本地\n",
    "with pd.ExcelWriter('知网文章数据.xlsx',mode='w',engine=\"openpyxl\") as writer:  \n",
    "            df_总表.to_excel(writer,sheet_name=\"知网\")\n",
    "display(df_总表)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 下载文件以及原文"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n"
     ]
    }
   ],
   "source": [
    "# 导出refworks文件（.txt）和下载文章\n",
    "# 每次全选不能超过500篇，分2次进行\n",
    "\n",
    "pages = list(range(1,11))\n",
    "print(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 返回第一页\n",
    "driver.find_element_by_id('total').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 清除选中\n",
    "driver.find_element_by_xpath('//*[@id=\"gridTable\"]/div[1]/div[2]/div[1]/a').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 选中页面50篇 —> 翻页\n",
    "def process_choose(pages):\n",
    "    for p in pages:\n",
    "        print (p,end='\\t')\n",
    "        全选 = driver.find_element_by_id('selectCheckAll1')\n",
    "        全选.click()\n",
    "        time.sleep(8+1*random())\n",
    "        跳转 = driver.find_element_by_id('PageNext')\n",
    "        跳转.click()\n",
    "        time.sleep(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\t2\t3\t4\t5\t6\t7\t8\t9\t10\t"
     ]
    }
   ],
   "source": [
    "process_choose(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 因出现错误（非验证码），补选最后两页文章\n",
    "driver.find_element_by_id('selectCheckAll1').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_id('PageNext').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_id('selectCheckAll1').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出与分析 \n",
    "driver.find_element_by_xpath('//i[@class=\"icon-d\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出文献\n",
    "driver.find_element_by_xpath('//i[@class=\"icon-r\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 点击Refworks\n",
    "driver.find_element_by_xpath('//a[@exporttype=\"Refworks\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-32BA9F5F2308C4D6CDAD4F08EB1352E1',\n",
       " 'CDwindow-2D1C5D5C89222381E9EF876BD15997BB',\n",
       " 'CDwindow-091957E35B1953C9B06C82213A72C6E0']"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 所有窗口ID\n",
    "driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-84-520070efe65b>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[2])\n"
     ]
    }
   ],
   "source": [
    "# 窗口切换\n",
    "driver.switch_to_window(driver.window_handles[2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出 .txt文件\n",
    "driver.find_element_by_xpath('//i[@class=\"icon icon-export\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-86-0188c2a7ff70>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "# 窗口切换\n",
    "driver.switch_to_window(driver.window_handles[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 批量下载\n",
    "driver.find_element_by_xpath('//li[@class=\"bulkdownload export\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-88-1f3bb34cc9cb>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[3])\n"
     ]
    }
   ],
   "source": [
    "# 窗口切换\n",
    "driver.switch_to_window(driver.window_handles[3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 下载所选文献（500篇）\n",
    "driver.find_element_by_id('btn-download-all').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-90-0188c2a7ff70>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "# 窗口切换\n",
    "driver.switch_to_window(driver.window_handles[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 清除选择\n",
    "driver.find_element_by_xpath('//*[@id=\"gridTable\"]/div[1]/div[2]/div[1]/a').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.find_element_by_id('PageNext').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[10, 11, 12, 13, 14, 15, 16, 17, 18]\n"
     ]
    }
   ],
   "source": [
    "# 第二轮下载\n",
    "pages = list(range(10,19))\n",
    "print(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10\t11\t12\t13\t14\t15\t16\t17\t18\t"
     ]
    }
   ],
   "source": [
    "process_choose(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 因出现错误（非验证码），补选最后一页文章\n",
    "driver.find_element_by_id('selectCheckAll1').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出与分析 \n",
    "driver.find_element_by_xpath('//i[@class=\"icon-d\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出文献\n",
    "driver.find_element_by_xpath('//i[@class=\"icon-r\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 点击Refworks\n",
    "driver.find_element_by_xpath('//a[@exporttype=\"Refworks\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-32BA9F5F2308C4D6CDAD4F08EB1352E1',\n",
       " 'CDwindow-2D1C5D5C89222381E9EF876BD15997BB',\n",
       " 'CDwindow-091957E35B1953C9B06C82213A72C6E0',\n",
       " 'CDwindow-4CE932109B6C7E45DD679EAE2C56E3DF',\n",
       " 'CDwindow-2C88904BE492F6F9F8EAFEFB20552A92']"
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 所有窗口ID\n",
    "driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-100-ecda85629064>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[4])\n"
     ]
    }
   ],
   "source": [
    "# 窗口切换\n",
    "driver.switch_to_window(driver.window_handles[4])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出 .txt文件\n",
    "driver.find_element_by_xpath('//i[@class=\"icon icon-export\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-102-0188c2a7ff70>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "# 窗口切换\n",
    "driver.switch_to_window(driver.window_handles[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 批量下载\n",
    "driver.find_element_by_xpath('//li[@class=\"bulkdownload export\"]').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-104-f0765e4dfd3b>:2: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[5])\n"
     ]
    }
   ],
   "source": [
    "# 窗口切换\n",
    "driver.switch_to_window(driver.window_handles[5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 下载所选文献（450篇）\n",
    "driver.find_element_by_id('btn-download-all').click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# for i in range(0,10):\n",
    "#     #获取当页内容\n",
    "#     element = driver.find_element_by_id('gridTable')\n",
    "#     page_html = element.get_attribute('innerHTML')\n",
    "#     pd.read_html(page_html)[0]\n",
    "#     # 全选\n",
    "#     element = driver.find_element_by_id('selectCheckAll1')\n",
    "#     element.click()\n",
    "#     Page_next = driver.find_element_by_id('PageNext')\n",
    "#     Page_next.click()\n",
    "#     time.sleep(8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "# element_download = driver.find_element_by_xpath('//*[@id=\"batchOpsBox\"]/li[1]/a')\n",
    "# element_download.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-A5A95BCCCCAEC786BC66BFBA886324C2',\n",
       " 'CDwindow-EFF2CF69690CD3AD5797DC003C2DB7FA',\n",
       " 'CDwindow-B2F5CD09C7684CDBB58ED662C102600E']"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# #查看窗口信息（现在打开了三个窗口）\n",
    "# driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'CDwindow-EFF2CF69690CD3AD5797DC003C2DB7FA'"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# driver.current_window_handle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-34-0dfe2a1ada8f>:1: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[2])\n"
     ]
    }
   ],
   "source": [
    "# driver.switch_to_window(driver.window_handles[2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "# download = driver.find_element_by_xpath('//*[@id=\"btn-download-all\"]')\n",
    "# download.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "# driver.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-A5A95BCCCCAEC786BC66BFBA886324C2',\n",
       " 'CDwindow-EFF2CF69690CD3AD5797DC003C2DB7FA']"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# #查看窗口信息（现在打开了两个窗口）\n",
    "# driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-38-6c6d5ce6602d>:1: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "# driver.switch_to_window(driver.window_handles[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "# clear = driver.find_element_by_xpath('//*[@id=\"gridTable\"]/div[1]/div[2]/div[1]/a')\n",
    "# clear.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "# for i in range(11,21):\n",
    "#     #获取当页内容\n",
    "#     element = driver.find_element_by_id('gridTable')\n",
    "#     page_html = element.get_attribute('innerHTML')\n",
    "#     pd.read_html(page_html)[0]\n",
    "#     # 全选\n",
    "#     element = driver.find_element_by_id('selectCheckAll1')\n",
    "#     element.click()\n",
    "#     Page_next = driver.find_element_by_id('PageNext')\n",
    "#     Page_next.click()\n",
    "#     time.sleep(8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "# element_download = driver.find_element_by_xpath('//*[@id=\"batchOpsBox\"]/li[1]/a')\n",
    "# element_download.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-A5A95BCCCCAEC786BC66BFBA886324C2',\n",
       " 'CDwindow-EFF2CF69690CD3AD5797DC003C2DB7FA',\n",
       " 'CDwindow-19EB91DF32CCC56FFE8B58D89675351E']"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# #查看窗口信息（现在打开了三个窗口）\n",
    "# driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'CDwindow-EFF2CF69690CD3AD5797DC003C2DB7FA'"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# driver.current_window_handle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-44-0dfe2a1ada8f>:1: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[2])\n"
     ]
    }
   ],
   "source": [
    "# driver.switch_to_window(driver.window_handles[2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "# download = driver.find_element_by_xpath('//*[@id=\"btn-download-all\"]')\n",
    "# download.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "# driver.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-A5A95BCCCCAEC786BC66BFBA886324C2',\n",
       " 'CDwindow-EFF2CF69690CD3AD5797DC003C2DB7FA']"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# #查看窗口信息（现在打开了两个窗口）\n",
    "# driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-48-6c6d5ce6602d>:1: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "# driver.switch_to_window(driver.window_handles[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "# clear = driver.find_element_by_xpath('//*[@id=\"gridTable\"]/div[1]/div[2]/div[1]/a')\n",
    "# clear.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "# for i in range(21,31):\n",
    "#     #获取当页内容\n",
    "#     element = driver.find_element_by_id('gridTable')\n",
    "#     page_html = element.get_attribute('innerHTML')\n",
    "#     pd.read_html(page_html)[0]\n",
    "#     # 全选\n",
    "#     element = driver.find_element_by_id('selectCheckAll1')\n",
    "#     element.click()\n",
    "#     Page_next = driver.find_element_by_id('PageNext')\n",
    "#     Page_next.click()\n",
    "#     time.sleep(8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "# element_download = driver.find_element_by_xpath('//*[@id=\"batchOpsBox\"]/li[1]/a')\n",
    "# element_download.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-A5A95BCCCCAEC786BC66BFBA886324C2',\n",
       " 'CDwindow-EFF2CF69690CD3AD5797DC003C2DB7FA',\n",
       " 'CDwindow-0ABBAAE2A579AC296B790C7865B95CEE']"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# #查看窗口信息（现在打开了三个窗口）\n",
    "# driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'CDwindow-EFF2CF69690CD3AD5797DC003C2DB7FA'"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# driver.current_window_handle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-54-0dfe2a1ada8f>:1: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[2])\n"
     ]
    }
   ],
   "source": [
    "# driver.switch_to_window(driver.window_handles[2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "# download = driver.find_element_by_xpath('//*[@id=\"btn-download-all\"]')\n",
    "# download.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "# driver.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CDwindow-A5A95BCCCCAEC786BC66BFBA886324C2',\n",
       " 'CDwindow-EFF2CF69690CD3AD5797DC003C2DB7FA']"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# #查看窗口信息（现在打开了两个窗口）\n",
    "# driver.window_handles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-58-6c6d5ce6602d>:1: DeprecationWarning: use driver.switch_to.window instead\n",
      "  driver.switch_to_window(driver.window_handles[1])\n"
     ]
    }
   ],
   "source": [
    "# driver.switch_to_window(driver.window_handles[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "# clear = driver.find_element_by_xpath('//*[@id=\"gridTable\"]/div[1]/div[2]/div[1]/a')\n",
    "# clear.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# image_url = driver.find_element_by_id('changeVercode').get_attribute('scr')\n",
    "# image_url"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# from PIL import Image\n",
    "# from io import BytesIO\n",
    "# import base64\n",
    "# image_本地链接 = 'C:/Users/Desktop/1.jpg'\n",
    "# def baidu_API_OCR(image_url):\n",
    "    ## 1.获取百度API\n",
    "#      host = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general?access_token=24.f9ba9c5241b67688bb4adbed8bc91dec.2592000.1485570332.282335-8574074'\n",
    "#     response = requests.get(host)\n",
    "#     if response:\n",
    "#     #     print(response.json())\n",
    "#             access_token = response.json()[\"access_token\"]\n",
    "#         request_url = \"https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic\"\n",
    "#         params = {\n",
    "#             \"url\":image_url\n",
    "#         }\n",
    "#         request_url = request_url + \"?access_token=\" + access_token\n",
    "#         headers = {'content-type': 'application/x-www-form-urlencoded'}\n",
    "#         response = requests.post(request_url, data=params, headers=headers)\n",
    "#         results = response.json()\n",
    "#         response\n",
    "        \n",
    "#         request_url = \"https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic\"\n",
    "#     # 二进制方式打开图片文件\n",
    "#     f = open('[本地文件]', 'rb')\n",
    "#     img = base64.b64encode(f.read())\n",
    "\n",
    "#     params = {\"image\":img}\n",
    "#     access_token = '[调用鉴权接口获取的token]'\n",
    "#     request_url = request_url + \"?access_token=\" + access_token\n",
    "#     headers = {'content-type': 'application/x-www-form-urlencoded'}\n",
    "#     response = requests.post(request_url, data=params, headers=headers)\n",
    "#     if response:\n",
    "#         print (response.json())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# baidu_API_OCR(image_url)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# for i in range(0,120):\n",
    "# #     if driver.find_element_by_id('changeVercode'):\n",
    "# #         # 解决验证码\n",
    "# #         # 调用API解决验证码的方法/fuc 返回一个结果result 就是我们的vercode\n",
    "# #         image_url = driver.find_element_by_id('changeVercode').get_attribute('scr')\n",
    "\n",
    "# #         vercode = baidu_API_OCR()\n",
    "# #         driver.find_element_by_id('vericode').send_keys(vercode)\n",
    "# #         driver.find_element_by_id('checkCodeBtn').click\n",
    "        \n",
    "# #     else:\n",
    "\n",
    "#         element = driver.find_element_by_id('PageNext')\n",
    "#         element.click()\n",
    "#         time.sleep(5)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
